This module builds on code contained in Coronavirus_Statistics_USAF_v005.Rmd. This file includes the latest code for analyzing data from USA Facts. USA Facts maintains data on cases and deaths by county for coronavirus in the US. Downloaded data are unique by county with date as a column and a separate file for each of cases, deaths, and population.
The intent of this code is to source updated functions that allow for readRunUSAFacts() to be run to obtain, read, process, and analyze data from USA Facts.
The tidyverse library is loaded, and the functions used for CDC daily processing are sourced. Additionally, specific functions for USA Facts are also sourced:
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.1 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Functions are available in source file
source("./Generic_Added_Utility_Functions_202105_v001.R")
source("./Coronavirus_CDC_Daily_Functions_v001.R")
source("./Coronavirus_USAF_Functions_v001.R")
Further, the mapping file specific to USA Facts is sourced:
source("./Coronavirus_USAF_Default_Mappings_v001.R")
Updated functions for diagnoseClusters(), createDetailedSummaries(), createSummary(), and helperSummaryMap() are added to Coronavirus_USAF_Functions_v001.R. These functions should be checked for consistency with state-level data with just a single copy kept later.
The functions are tested on previously downloaded data, with results cached:
urlMapper[["usafCase"]] <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
urlMapper[["usafDeath"]] <- "https://static.usafacts.org/public/data/covid-19/covid_deaths_usafacts.csv"
urlMapper[["usafPop"]] <- "https://static.usafacts.org/public/data/covid-19/covid_county_population_usafacts.csv"
readList <- list("usafCase"="./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20220308.csv",
"usafDeath"="./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20220308.csv"
)
compareList <- list("usafCase"=readFromRDS("cty_newdata_20220202")$dfRaw$usafCase,
"usafDeath"=readFromRDS("cty_newdata_20220202")$dfRaw$usafDeath
)
# Use existing clusters
cty_chkdata_20220308 <- readRunUSAFacts(maxDate="2022-03-06",
downloadTo=lapply(readList,
FUN=function(x) if(file.exists(x)) NA else x
),
readFrom=readList,
compareFile=compareList,
writeLog="./RInputFiles/Coronavirus/USAF_NewData_20220308_chk_v005.log",
ovrwriteLog=TRUE,
useClusters=readFromRDS("cty_newdata_20210813")$useClusters,
skipAssessmentPlots=FALSE,
brewPalette="Paired"
)
##
## No file has been downloaded, will use existing file: ./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20220308.csv
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## `County Name` = col_character(),
## State = col_character(),
## StateFIPS = col_character()
## )
## i Use `spec()` for the full column specifications.
##
## *** File has been checked for uniqueness by: countyFIPS countyName state stateFIPS
##
##
## *** File has been checked for uniqueness by: countyFIPS stateFIPS date
##
##
## Checking for similarity of: column names
## In reference but not in current:
## In current but not in reference:
##
## Checking for similarity of: date
## In reference but not in current: 0
## In current but not in reference: 32
## Detailed differences available in: ./RInputFiles/Coronavirus/USAF_NewData_20220308_chk_v005.log
##
## Checking for similarity of: county
## In reference but not in current:
## In current but not in reference:
##
##
## ***Differences of at least 5 and at least 5%
##
## 6 records
## Detailed output available in log: ./RInputFiles/Coronavirus/USAF_NewData_20220308_chk_v005.log
##
##
## ***Differences of at least 0 and at least 0.1%
##
## 228 records
## Detailed output available in log: ./RInputFiles/Coronavirus/USAF_NewData_20220308_chk_v005.log
##
##
## No file has been downloaded, will use existing file: ./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20220308.csv
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## `County Name` = col_character(),
## State = col_character(),
## StateFIPS = col_character()
## )
## i Use `spec()` for the full column specifications.
##
## *** File has been checked for uniqueness by: countyFIPS countyName state stateFIPS
##
##
## *** File has been checked for uniqueness by: countyFIPS stateFIPS date
##
##
## Checking for similarity of: column names
## In reference but not in current:
## In current but not in reference:
##
## Checking for similarity of: date
## In reference but not in current: 0
## In current but not in reference: 32
## Detailed differences available in: ./RInputFiles/Coronavirus/USAF_NewData_20220308_chk_v005.log
##
## Checking for similarity of: county
## In reference but not in current:
## In current but not in reference:
##
##
## ***Differences of at least 5 and at least 5%
##
## 44 records
## Detailed output available in log: ./RInputFiles/Coronavirus/USAF_NewData_20220308_chk_v005.log
##
##
## ***Differences of at least 0 and at least 0.1%
##
## 253 records
## Detailed output available in log: ./RInputFiles/Coronavirus/USAF_NewData_20220308_chk_v005.log
##
##
## Column sums before and after applying filtering rules:
## # A tibble: 3 x 4
## isType cases new_cases n
## <chr> <dbl> <dbl> <dbl>
## 1 before 1.92e+10 7.73e+7 2468189
## 2 after 1.90e+10 7.69e+7 2428766
## 3 pctchg 5.39e- 3 5.14e-3 0.0160
##
##
## Column sums before and after applying filtering rules:
## # A tibble: 3 x 4
## isType deaths new_deaths n
## <chr> <dbl> <dbl> <dbl>
## 1 before 3.27e+8 951208 2468189
## 2 after 3.20e+8 907057 2428766
## 3 pctchg 2.11e-2 0.0464 0.0160
## NULL
# Plot all counties based on closest cluster
sparseCountyClusterMap(cty_chkdata_20220308$useClusters,
caption="Includes only counties with 25k+ population",
brewPalette="viridis"
)
# Save the check file
saveToRDS(cty_chkdata_20220308, ovrWriteError=FALSE)
##
## File already exists: ./RInputFiles/Coronavirus/cty_chkdata_20220308.RDS
##
## Not replacing the existing file since ovrWrite=FALSE
## NULL
# Confirm that it is identical to the previous process
cty_newdata_20220308 <- readFromRDS("cty_newdata_20220308")
# Same names in the list
all.equal(names(cty_chkdata_20220308), names(cty_newdata_20220308))
## [1] TRUE
# Identical items in the list
sapply(names(cty_chkdata_20220308),
FUN=function(x) identical(cty_chkdata_20220308[[x]], cty_newdata_20220308[[x]])
)
## countyData dfRaw dfProcess dfPerCapita useClusters maxDate
## TRUE TRUE TRUE TRUE TRUE TRUE
## plotDataList
## FALSE
# ggplot2 objects are never identical due to environment; confirm they are all.equal
all.equal(cty_chkdata_20220308$plotDataList, cty_newdata_20220308$plotDataList)
## [1] TRUE
The capability for obtaining and processing county-level vaccines data is included:
# Read the relevant vaccines data
vaxPartialRaw_20220309 <- downloadCountyVaccines(loc="./RInputFiles/Coronavirus/county_vaccine_20220309.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## Date = col_character(),
## FIPS = col_character(),
## Recip_County = col_character(),
## Recip_State = col_character(),
## SVI_CTGY = col_character(),
## Metro_status = col_character()
## )
## i Use `spec()` for the full column specifications.
##
## Records from other than 50 states and DC:
## # A tibble: 9 x 2
## state n
## <chr> <int>
## 1 AS 442
## 2 FM 447
## 3 GU 897
## 4 MH 434
## 5 MP 444
## 6 PR 35625
## 7 PW 441
## 8 UNK 308
## 9 VI 1794
vaxPartialRaw_20220309
## # A tibble: 1,480,110 x 10
## date FIPS countyName state vxcpoppct vxcgte18pct vxcgte65pct pop
## <date> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2022-03-08 06101 Sutter County CA 60.3 73 87.7 96971
## 2 2022-03-08 12009 Brevard Coun~ FL 64 72.9 88.4 601942
## 3 2022-03-08 19085 Harrison Cou~ IA 51.6 62.1 88.9 14049
## 4 2022-03-08 06069 San Benito C~ CA 66 75.6 83.3 62808
## 5 2022-03-08 19057 Des Moines C~ IA 51.1 61.9 83.4 38967
## 6 2022-03-08 05073 Lafayette Co~ AR 41.2 48.5 57.4 6624
## 7 2022-03-08 16021 Boundary Cou~ ID 34.8 42.8 63.6 12245
## 8 2022-03-08 20073 Greenwood Co~ KS 49 59.6 80.6 5982
## 9 2022-03-08 33005 Cheshire Cou~ NH 61.6 69.1 89.6 76085
## 10 2022-03-08 37159 Rowan County NC 43.8 52.5 74.1 142088
## # ... with 1,480,100 more rows, and 2 more variables: popgte18 <dbl>,
## # popgte65 <dbl>
# Repair the data for 65+
vaxFix65_20220309 <- repairVaxPopulation(vaxPartialRaw_20220309, colsRepair=c("popgte65"))
##
## Count of NA records by column
## state FIPS popgte65_minpop popgte65_maxpop popgte65_nnA
## 0 0 0 0 0
## n
## 0
##
## Records where minimum and maximum population differ# A tibble: 0 x 5
## # ... with 5 variables: state <chr>, FIPS <chr>, age <chr>, minpop <dbl>,
## # maxpop <dbl>
vaxFix65_20220309
## # A tibble: 1,417,042 x 10
## date FIPS countyName state vxcpoppct vxcgte18pct vxcgte65pct pop
## <date> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2022-03-08 06101 Sutter County CA 60.3 73 87.7 96971
## 2 2022-03-08 12009 Brevard Coun~ FL 64 72.9 88.4 601942
## 3 2022-03-08 19085 Harrison Cou~ IA 51.6 62.1 88.9 14049
## 4 2022-03-08 06069 San Benito C~ CA 66 75.6 83.3 62808
## 5 2022-03-08 19057 Des Moines C~ IA 51.1 61.9 83.4 38967
## 6 2022-03-08 05073 Lafayette Co~ AR 41.2 48.5 57.4 6624
## 7 2022-03-08 16021 Boundary Cou~ ID 34.8 42.8 63.6 12245
## 8 2022-03-08 20073 Greenwood Co~ KS 49 59.6 80.6 5982
## 9 2022-03-08 33005 Cheshire Cou~ NH 61.6 69.1 89.6 76085
## 10 2022-03-08 37159 Rowan County NC 43.8 52.5 74.1 142088
## # ... with 1,417,032 more rows, and 2 more variables: popgte18 <dbl>,
## # popgte65 <dbl>
Correlations data can also be run:
corrList20220309 <- corrVaxBurden(lstCD=cty_newdata_20220308,
dfVax=vaxPartialRaw_20220309,
minDateCD=c("2021-11-01", "2021-09-01"),
maxDateCD="2022-02-28"
)
##
## Will run with parameters:
## burdenVar: cpm dpm
## vaxVar: vxcpoppct vxcpoppct
## minDateCD: 2021-11-01 2021-09-01
## maxDateCD: 2022-02-28 2022-02-28
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
##
## Call:
## lm(formula = get(burdenVar) ~ vaxMetric, data = dfReg, weights = pop)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -57553717 -2611410 -275030 2650218 123342871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68194.65 1887.85 36.12 <2e-16 ***
## vaxMetric 530.27 33.93 15.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7406000 on 3140 degrees of freedom
## Multiple R-squared: 0.07217, Adjusted R-squared: 0.07187
## F-statistic: 244.2 on 1 and 3140 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = get(burdenVar) ~ vaxMetric * type + 0 - vaxMetric,
## data = dfReg, weights = pop)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -57474876 -2595922 -291957 2584956 122543573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## type<25k 58253.65 6207.86 9.384 < 2e-16 ***
## type>500k 66890.28 3631.97 18.417 < 2e-16 ***
## type100k-500k 72662.44 3757.76 19.337 < 2e-16 ***
## type25k-100k 65791.22 4409.38 14.921 < 2e-16 ***
## vaxMetric:type<25k 731.64 145.96 5.013 5.67e-07 ***
## vaxMetric:type>500k 553.88 60.14 9.210 < 2e-16 ***
## vaxMetric:type100k-500k 429.71 69.68 6.167 7.84e-10 ***
## vaxMetric:type25k-100k 628.19 96.06 6.540 7.17e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7400000 on 3134 degrees of freedom
## Multiple R-squared: 0.9476, Adjusted R-squared: 0.9474
## F-statistic: 7081 on 8 and 3134 DF, p-value: < 2.2e-16
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
##
## Call:
## lm(formula = get(burdenVar) ~ vaxMetric, data = dfReg, weights = pop)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -1371927 -23067 44651 121402 729890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1254.2057 22.8836 54.81 <2e-16 ***
## vaxMetric -9.5890 0.4779 -20.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 160100 on 3140 degrees of freedom
## Multiple R-squared: 0.1136, Adjusted R-squared: 0.1134
## F-statistic: 402.6 on 1 and 3140 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = get(burdenVar) ~ vaxMetric * type + 0 - vaxMetric,
## data = dfReg, weights = pop)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -885434 -65276 -4779 67115 1060610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## type<25k 1760.1655 74.0739 23.762 < 2e-16 ***
## type>500k 758.0391 29.0606 26.085 < 2e-16 ***
## type100k-500k 1304.8595 41.8238 31.199 < 2e-16 ***
## type25k-100k 1668.2572 50.2491 33.200 < 2e-16 ***
## vaxMetric:type<25k -11.1910 2.0925 -5.348 9.52e-08 ***
## vaxMetric:type>500k -3.2769 0.5600 -5.851 5.38e-09 ***
## vaxMetric:type100k-500k -9.4838 0.8902 -10.653 < 2e-16 ***
## vaxMetric:type25k-100k -11.0196 1.2902 -8.541 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 140000 on 3134 degrees of freedom
## Multiple R-squared: 0.8063, Adjusted R-squared: 0.8058
## F-statistic: 1631 on 8 and 3134 DF, p-value: < 2.2e-16
corrList20220309
## [[1]]
## # A tibble: 3,142 x 10
## fips countyName pop state vxcpoppct vxcgte18pct vxcgte65pct cpm dpm
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42129 Westmorela~ 348899 PA 54.2 63.2 81.2 96676. 1238.
## 2 39109 Miami Coun~ 106987 OH 42.8 53 77.6 91619. 1318.
## 3 16045 Gem County 18112 ID 34.5 44.7 71 44611. 939.
## 4 05071 Johnson Co~ 26578 AR 44.4 54.8 67.7 74761. 452.
## 5 48411 San Saba C~ 6055 TX 32.7 40 54.4 38811. 991.
## 6 51710 Norfolk ci~ 242742 VA 48.2 56.7 79.4 69044. 470.
## 7 27007 Beltrami C~ 47188 MN 52.5 64.7 91.4 94982. 805.
## 8 05067 Jackson Co~ 16719 AR 34.4 41.2 66.9 98870. 658.
## 9 51735 Poquoson c~ 12271 VA 43.5 52.8 69.7 79945. 652.
## 10 47047 Fayette Co~ 41133 TN 49.2 57.9 76.3 97610. 1386.
## # ... with 3,132 more rows, and 1 more variable: type <chr>
##
## [[2]]
## # A tibble: 3,142 x 10
## fips countyName pop state vxcpoppct vxcgte18pct vxcgte65pct cpm dpm
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 51710 Norfolk ci~ 242742 VA 39.4 46.7 68.7 8.43e4 704.
## 2 01127 Walker Cou~ 63521 AL 32.7 41 68.1 1.46e5 1810.
## 3 28153 Wayne Coun~ 20183 MS 27.1 34.7 58.2 1.06e5 1288.
## 4 26045 Eaton Coun~ 110268 MI 47.4 56.5 77.4 1.41e5 1533.
## 5 48313 Madison Co~ 14284 TX 0 0 0 1.00e5 1050.
## 6 39095 Lucas Coun~ 428348 OH 39.2 48.2 67.2 1.22e5 1158.
## 7 29153 Ozark Coun~ 9174 MO 24.7 30 41 7.60e4 3052.
## 8 29071 Franklin C~ 103967 MO 44.5 55 81.4 9.62e4 1212.
## 9 21149 McLean Cou~ 9207 KY 40.9 51.6 76.7 1.56e5 1629.
## 10 13233 Polk County 42613 GA 9.4 11.7 9.8 9.84e4 1877.
## # ... with 3,132 more rows, and 1 more variable: type <chr>
Comparisons can be run between summed county and state data:
statePerCapita <- readFromRDS("cdc_daily_220304")$dfPerCapita
tempStateCompareList <- compareStateSummedCounty(dfState=statePerCapita,
dfCounty=cty_newdata_20220308$dfPerCapita,
aggData=TRUE,
dateThru="2022-02-28",
returnData=TRUE
)
## Warning: Removed 6 row(s) containing missing values (geom_path).
tempStateCompareList
## $dfState
## # A tibble: 3,160 x 6
## date name src value value7 state
## <date> <chr> <chr> <dbl> <dbl> <chr>
## 1 2020-01-01 cases state 0 NA Aggregated
## 2 2020-01-01 deaths state 0 NA Aggregated
## 3 2020-01-01 new_cases state 0 NA Aggregated
## 4 2020-01-01 new_deaths state 0 NA Aggregated
## 5 2020-01-02 cases state 0 NA Aggregated
## 6 2020-01-02 deaths state 0 NA Aggregated
## 7 2020-01-02 new_cases state 0 NA Aggregated
## 8 2020-01-02 new_deaths state 0 NA Aggregated
## 9 2020-01-03 cases state 0 NA Aggregated
## 10 2020-01-03 deaths state 0 NA Aggregated
## # ... with 3,150 more rows
##
## $dfCounty
## # A tibble: 3,052 x 6
## date name src value value7 state
## <date> <chr> <chr> <dbl> <dbl> <chr>
## 1 2020-01-25 cases county 751 750. Aggregated
## 2 2020-01-25 deaths county 1 1 Aggregated
## 3 2020-01-25 new_cases county 10 111. Aggregated
## 4 2020-01-25 new_deaths county 0 0.143 Aggregated
## 5 2020-01-26 cases county 759 758. Aggregated
## 6 2020-01-26 deaths county 1 1 Aggregated
## 7 2020-01-26 new_cases county 8 8 Aggregated
## 8 2020-01-26 new_deaths county 0 0 Aggregated
## 9 2020-01-27 cases county 769 766. Aggregated
## 10 2020-01-27 deaths county 1 1 Aggregated
## # ... with 3,042 more rows
The scoring metric is converted to functional form:
# Data for score similarity process
tempStateCompareList_v2 <- compareStateSummedCounty(dfState=statePerCapita,
dfCounty=cty_newdata_20220308$dfPerCapita,
inclStates=c(state.abb, "DC"),
dateThru="2022-02-28",
makePlot=FALSE,
returnData=TRUE
)
scoreSimilarity(tempStateCompareList_v2, minDate="2020-02-15", maxDate="2022-02-15", makeFacet=FALSE)
# Example states with meaningful disconnects on 1+ metrics
compareStateSummedCounty(dfState=statePerCapita,
dfCounty=cty_newdata_20220308$dfPerCapita,
inclStates=c("FL", "MO", "OK", "TX", "ME", "NE", "KY", "AL", "GA"),
dateThru="2022-02-28",
makePlot=TRUE,
returnData=FALSE
)
## Warning: Removed 6 row(s) containing missing values (geom_path).
## Warning: Removed 6 row(s) containing missing values (geom_path).
## Warning: Removed 6 row(s) containing missing values (geom_path).
## Warning: Removed 6 row(s) containing missing values (geom_path).
## Warning: Removed 6 row(s) containing missing values (geom_path).
## Warning: Removed 6 row(s) containing missing values (geom_path).
## Warning: Removed 6 row(s) containing missing values (geom_path).
## Warning: Removed 6 row(s) containing missing values (geom_path).
## Warning: Removed 6 row(s) containing missing values (geom_path).
While cumulative deaths and cumulative cases are generally well aligned between sources, rolling 7-day deaths and cases are frequently divergent by source.
An integrated vaccines dataset can be created:
allState_20220309 <- integrateStateVaccine(vaxFix65_20220309, statePerCap=statePerCapita)
allState_20220309
## # A tibble: 23,001 x 11
## state date vxcpoppct vxcgte18pct vxcgte65pct ctypoppct ctygte18pct
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AK 2020-12-13 NA NA NA 0 0
## 2 AL 2020-12-13 NA NA NA 0 0
## 3 AR 2020-12-13 NA NA NA 0 0
## 4 AZ 2020-12-13 NA NA NA 0 0
## 5 CA 2020-12-13 NA NA NA 0 0
## 6 CO 2020-12-13 NA NA NA 0 0
## 7 CT 2020-12-13 NA NA NA 0 0
## 8 DC 2020-12-13 NA NA NA 0 0
## 9 DE 2020-12-13 NA NA NA 0 0
## 10 FL 2020-12-13 NA NA NA 0 0
## # ... with 22,991 more rows, and 4 more variables: ctygte65pct <dbl>,
## # pop <dbl>, popgte18 <dbl>, popgte65 <dbl>
Functionality for exploring vaccine evolution is included:
# Example for a single state
stateAgeVaxEvolution(allState_20220309, keyState="FL", minDate="2020-12-15", returnData=TRUE)
## Warning: Removed 5 row(s) containing missing values (geom_path).
## # A tibble: 2,694 x 6
## state date name value src age
## <chr> <date> <chr> <dbl> <chr> <chr>
## 1 FL 2020-12-15 vxcpoppct 0 State All
## 2 FL 2020-12-15 vxcgte18pct 0 State 18+
## 3 FL 2020-12-15 vxcgte65pct 0 State 65+
## 4 FL 2020-12-15 ctypoppct 0 Summed County All
## 5 FL 2020-12-15 ctygte18pct 0 Summed County 18+
## 6 FL 2020-12-15 ctygte65pct 0 Summed County 65+
## 7 FL 2020-12-16 vxcpoppct 0 State All
## 8 FL 2020-12-16 vxcgte18pct 0 State 18+
## 9 FL 2020-12-16 vxcgte65pct 0 State 65+
## 10 FL 2020-12-16 ctypoppct 0 Summed County All
## # ... with 2,684 more rows
# Example for multiple states without plotting
stateAgeVaxEvolution(allState_20220309, keyState=c("CA", "FL", "TX", "NY", "PA", "IL"), createPlot=FALSE)
## # A tibble: 16,236 x 6
## state date name value src age
## <chr> <date> <chr> <dbl> <chr> <chr>
## 1 CA 2020-12-13 vxcpoppct NA State All
## 2 CA 2020-12-13 vxcgte18pct NA State 18+
## 3 CA 2020-12-13 vxcgte65pct NA State 65+
## 4 CA 2020-12-13 ctypoppct 0 Summed County All
## 5 CA 2020-12-13 ctygte18pct 0 Summed County 18+
## 6 CA 2020-12-13 ctygte65pct 0 Summed County 65+
## 7 FL 2020-12-13 vxcpoppct NA State All
## 8 FL 2020-12-13 vxcgte18pct NA State 18+
## 9 FL 2020-12-13 vxcgte65pct NA State 65+
## 10 FL 2020-12-13 ctypoppct 0 Summed County All
## # ... with 16,226 more rows
# Example for multiple states with plotting
stateAgeVaxEvolution(allState_20220309, keyState=c("CT", "AR", "AZ"), minDate="2020-12-15", returnData=TRUE)
## Warning: Removed 5 row(s) containing missing values (geom_path).
## # A tibble: 8,082 x 6
## state date name value src age
## <chr> <date> <chr> <dbl> <chr> <chr>
## 1 AR 2020-12-15 vxcpoppct 0 State All
## 2 AR 2020-12-15 vxcgte18pct 0 State 18+
## 3 AR 2020-12-15 vxcgte65pct 0 State 65+
## 4 AR 2020-12-15 ctypoppct 0 Summed County All
## 5 AR 2020-12-15 ctygte18pct 0 Summed County 18+
## 6 AR 2020-12-15 ctygte65pct 0 Summed County 65+
## 7 AZ 2020-12-15 vxcpoppct 0 State All
## 8 AZ 2020-12-15 vxcgte18pct 0 State 18+
## 9 AZ 2020-12-15 vxcgte65pct 0 State 65+
## 10 AZ 2020-12-15 ctypoppct 0 Summed County All
## # ... with 8,072 more rows
Scores can be created for every state, reflecting differences in the vaccination data:
scoreVaxSimilarity(allState_20220309)
scoreVaxSimilarity(allState_20220309, minDate="2021-12-01", maxDate="2022-02-28", returnBaseData=TRUE)
## # A tibble: 459 x 7
## state ym age n rmse cdcState ctySum
## <chr> <chr> <chr> <int> <dbl> <dbl> <dbl>
## 1 AK 2021-12 18+ 31 2.30 67.2 65.0
## 2 AK 2021-12 65+ 31 2.25 83.9 81.7
## 3 AK 2021-12 All 31 1.84 55.6 53.7
## 4 AK 2022-01 18+ 31 2.48 68.6 66.2
## 5 AK 2022-01 65+ 31 2.35 84.4 82.1
## 6 AK 2022-01 All 31 1.98 57.3 55.4
## 7 AK 2022-02 18+ 28 1.88 71.4 69.6
## 8 AK 2022-02 65+ 28 1.86 85.0 83.2
## 9 AK 2022-02 All 28 1.48 60.2 58.7
## 10 AL 2021-12 18+ 31 4.26 57.2 53.0
## # ... with 449 more rows
stateAgeVaxEvolution(allState_20220309,
keyState=c("HI", "TX", "VA", "GA", "CO", "WV", "VT"),
createPlot = TRUE
)
## Warning: Removed 6 row(s) containing missing values (geom_path).
County-level burden process mapping is included:
makeBurdenSummary <- function(lst,
groupVar=c("countyFIPS", "state"),
numVarFinal=c("tdpm", "tcpm"),
numVarSum=c("dpm", "cpm"),
keyDate=NULL,
dateRange=28
) {
# FUNCTION ARGUMENTS
# lst: list of processed county burden data
# groupVar: grouping variables for the final dataset
# numVarFinal: numeric variables to pull data from the key date
# numVarSum: numeric variables to sum from the key date interval
# keyDate: the key date for the summaries (NULL means use maximum in data)
# dateRange: number of days to include in the numeric interval summaries
# Find keyDate if not provided, convert to date if not already
if(is.null(keyDate)) keyDate <- lst[["dfPerCapita"]] %>% pull(date) %>% max()
if(!("Date" %in% class(keyDate))) keyDate <- as.Date(keyDate)
# Create summary
df <- lst[["dfPerCapita"]] %>%
group_by_at(all_of(groupVar)) %>%
summarize(asofDate=keyDate,
across(all_of(numVarFinal), .fns=~sum(ifelse(date==keyDate, .x, 0))),
across(all_of(numVarSum),
.fns=~sum(ifelse(date>keyDate-dateRange & date<=keyDate, .x, 0)),
.names=paste0("{.col}", as.character(dateRange))
),
.groups="drop"
)
# Return the data frame
df
}
createBurdenCountyDate <- function(lst,
maxDate,
rollBy=months(c(0, -2, -4, -6)),
dateSpan=35
) {
# FUNCTION ARGUMENTS:
# lst: processed county-level list data
# maxDate: the latest data to use in the data
# rollBy: time to roll from maxDate
# dateSpan: number of days in the reporting interval
as.Date(maxDate) %>%
lubridate::add_with_rollback(rollBy) %>%
purrr::map_dfr(.f=~makeBurdenSummary(lst, keyDate=.x, dateRange=dateSpan))
}
makeBurdenDatePlot <- function(df,
keyVar,
timeLabel,
plotTitle=NULL,
varLabel=NULL,
varFloor=0,
varCeiling=+Inf,
varDivBy=1,
vecRename=c("countyFIPS"="fips")
) {
# FUNCTION ARGUMENTS:
# df: a processed data frame with fips, asofDate, burden
# keyVar: character string for variable to be plotted
# timeLabel: character string for amount of time (e.g., "1-month" or "5-week")
# plotTitle: title for the plot (NULL means infer from other arguments)
# varLabel: label for the variable in plot scale (NULL means infer from other arguments)
# varFloor: minimum value to be allowed for variable (-Inf means no floor applied)
# varCeiling: maximum value to be allowed for variable (Inf means no ceiling applied)
# varDivBy: variable should be divivded by this for plotting
# vecRename: renaming vector to get desired variables in frame
# Create varLabel if passed as NULL
if(is.null(varLabel)) {
varLabel <- stringr::str_to_upper(stringr::str_extract(keyVar, "^[A-Za-z]*"))
if((varDivBy > 1) & isTRUE(all.equal(log10(varDivBy) %% 1, 0)))
varLabel <- paste0(varLabel, "(", stringr::str_replace(varDivBy, pattern="1", replacement=""), "s)")
else if (varDivBy != 1) varLabel <- paste0(varLabel, "(units of ", varDivBy, ")")
}
# Create plotTitle if passed as NULL
if(is.null(plotTitle))
plotTitle <- paste0(timeLabel,
" coronavirus ",
if(str_detect(stringr::str_to_upper(keyVar), pattern="CPM")) "cases" else "deaths",
" by county"
)
# Create and return plot
df %>%
colRenamer(vecRename=vecRename) %>%
mutate(burden=pmax(pmin(get(all_of(keyVar)), varCeiling), varFloor)/varDivBy) %>%
select(fips, burden, asofDate) %>%
usmap::plot_usmap(regions="counties", data=., values="burden") +
labs(title=plotTitle,
subtitle=if(varFloor > -Inf | varCeiling < +Inf) "Floors and/or ceilings applied" else NULL,
caption="Source: USA Facts"
) +
scale_fill_continuous(paste0(varLabel, "\n", timeLabel), low="grey", high="red") +
facet_wrap(~asofDate) +
theme(legend.position="bottom")
}
dfRoll20220308 <- createBurdenCountyDate(cty_newdata_20220308,
maxDate="2022-02-28",
rollBy=months(c(0, -3, -6, -9)),
dateSpan=91
)
dfRoll20220308
## # A tibble: 12,568 x 7
## countyFIPS state asofDate tdpm tcpm dpm91 cpm91
## <chr> <chr> <date> <dbl> <dbl> <dbl> <dbl>
## 1 01001 AL 2022-02-28 3472. 277614. 662. 89370.
## 2 01003 AL 2022-02-28 2867. 246280. 228. 75674.
## 3 01005 AL 2022-02-28 3767. 220570. 527. 70890.
## 4 01007 AL 2022-02-28 4421. 284674. 223. 90873.
## 5 01009 AL 2022-02-28 3770. 255249. 450. 69917.
## 6 01011 AL 2022-02-28 4851. 225621. 396. 74547.
## 7 01013 AL 2022-02-28 6119. 258896. 977. 82271.
## 8 01015 AL 2022-02-28 5176. 278491. 616. 79680.
## 9 01017 AL 2022-02-28 4571. 253533. 301. 79599.
## 10 01019 AL 2022-02-28 2978. 193694. 573. 72377.
## # ... with 12,558 more rows
makeBurdenDatePlot(dfRoll20220308, keyVar="cpm91", timeLabel="3-month", varCeiling=100000, varDivBy=1000)
makeBurdenDatePlot(dfRoll20220308, keyVar="dpm91", timeLabel="3-month", varCeiling=1500)